From c2929ecdf6cba5d5e5fd09c52e465e5f4a22fb83 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Tue, 23 Mar 2004 09:33:07 +0000 Subject: [PATCH] bitkeeper revision 1.820 (40600453bCkH6oPCubNeqIe3OBUXGQ) io.h: new file Many files: Further IO virtualisation patches. --- .rootkeys | 1 + xen/common/domain.c | 4 +- xen/common/physdev.c | 84 +++- xen/include/hypervisor-ifs/network.h | 12 +- xen/include/xeno/vif.h | 15 +- xen/net/dev.c | 8 +- xenolinux-2.4.25-sparse/arch/xeno/config.in | 4 + .../arch/xeno/drivers/network/network.c | 19 +- .../arch/xeno/drivers/vnetif/vnetif.c | 19 +- .../arch/xeno/kernel/hypervisor.c | 43 +- .../arch/xeno/kernel/i386_ksyms.c | 14 +- .../arch/xeno/kernel/physirq.c | 54 ++- xenolinux-2.4.25-sparse/include/asm-xeno/io.h | 430 ++++++++++++++++++ xenolinux-2.4.25-sparse/mkbuildtree | 3 +- 14 files changed, 637 insertions(+), 73 deletions(-) create mode 100644 xenolinux-2.4.25-sparse/include/asm-xeno/io.h diff --git a/.rootkeys b/.rootkeys index aeda8c823a..26a04cd31c 100644 --- a/.rootkeys +++ b/.rootkeys @@ -666,6 +666,7 @@ 3e5a4e67w_DWgjIJ17Tlossu1LGujQ xenolinux-2.4.25-sparse/include/asm-xeno/highmem.h 3e5a4e67YtcyDLQsShhCfQwPSELfvA xenolinux-2.4.25-sparse/include/asm-xeno/hw_irq.h 3e5a4e677VBavzM1UZIEcH1B-RlXMA xenolinux-2.4.25-sparse/include/asm-xeno/hypervisor.h +4060044fVx7-tokvNLKBf_6qBB4lqQ xenolinux-2.4.25-sparse/include/asm-xeno/io.h 3e5a4e673p7PEOyHFm3nHkYX6HQYBg xenolinux-2.4.25-sparse/include/asm-xeno/irq.h 3ead095db_LRUXnxaqs0dA1DWhPoQQ xenolinux-2.4.25-sparse/include/asm-xeno/keyboard.h 3e5a4e678ddsQOpbSiRdy1GRcDc9WA xenolinux-2.4.25-sparse/include/asm-xeno/mmu_context.h diff --git a/xen/common/domain.c b/xen/common/domain.c index e61f02a26a..17cfc733ba 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -150,7 +150,7 @@ void __kill_domain(struct task_struct *p) if ( !sched_rem_domain(p) ) return; - printk("Killing domain %llu\n", p->domain); + DPRINTK("Killing domain %llu\n", p->domain); unlink_blkdev_info(p); @@ -482,7 +482,7 @@ void release_task(struct task_struct *p) ASSERT(p->state == TASK_DYING); ASSERT(!p->has_cpu); - printk("Releasing task %llu\n", p->domain); + DPRINTK("Releasing task %llu\n", p->domain); /* * This frees up blkdev rings and vbd-access lists. Totally safe since diff --git a/xen/common/physdev.c b/xen/common/physdev.c index 6daeb169e3..a95f496153 100644 --- a/xen/common/physdev.c +++ b/xen/common/physdev.c @@ -24,7 +24,15 @@ * size of teh region, is faked out by a very simple state machine, * preventing direct writes to the PCI config registers by a guest. * - * XXX Some comment on IRQ handling + * Interrupt handling is currently done in a very cheese fashion. + * We take the default irq controller code and replace it with our own. + * If an interrupt comes in it is acked using the PICs normal routine. Then + * an event is send to the receiving domain which has to explicitly call + * once it is finished dealing with the interrupt. Only then the PICs end + * handler is called. very cheesy with all sorts of problems but it seems + * to work in normal cases. No shared interrupts are allowed. + * + * XXX this code is not SMP safe at the moment! */ @@ -77,6 +85,12 @@ typedef struct _phys_dev_st /* an array of device descriptors index by IRQ number */ static phys_dev_t *irqs[MAX_IRQS]; +/* + * + * General functions + * + */ + /* find a device on the device list */ static phys_dev_t *find_pdev(struct task_struct *p, struct pci_dev *dev) { @@ -237,6 +251,11 @@ inline static int check_dev_acc (struct task_struct *p, return 0; } +/* + * + * PCI config space access + * + */ /* * Base address registers contain the base address for IO regions. @@ -313,6 +332,7 @@ static int do_base_address_access(phys_dev_t *pdev, int acc, if ( res->flags & IORESOURCE_MEM ) { + /* this is written out explicitly for clarity */ *val = 0xffffffff; /* bit 0 = 0 */ /* bit 21 = memory type */ @@ -501,6 +521,13 @@ static long pci_cfgreg_write(int seg, int bus, int dev, int func, int reg, func, reg, len, &val); return ret; break; +#if 0 + case 0xe0: /* XXX some device drivers seem to write to this.... */ + printk("pci write hack allowed %02x:%02x:%02x: " + "reg=0x%02x len=0x%02x val=0x%08x\n", + bus, dev, func, reg, len, val); + break; +#endif default: //if ( pdev->flags != ACC_WRITE ) /* XXX for debug we disallow all write access */ @@ -520,6 +547,12 @@ static long pci_cfgreg_write(int seg, int bus, int dev, int func, int reg, return ret; } +/* + * + * Interrupt handling + * + */ + /* * return the IRQ xen assigned to the device. @@ -552,26 +585,23 @@ static void phys_dev_interrupt(int irq, void *dev_id, struct pt_regs *ptregs) return; } - //printk("irq %d pdev=%p\n", irq, pdev); - p = pdev->owner; - //printk("owner %p\n", p); - if ( test_bit(irq, &p->shared_info->physirq_pend) ) { - printk("irq %d already delivered to guest\n", irq); + /* Some interrupt already delivered to guest */ return; } + /* notify guest */ set_bit(irq, &p->shared_info->physirq_pend); set_bit(ST_IRQ_DELIVERED, &pdev->state); - cpu_mask |= mark_guest_event(p, _EVENT_TIMER); + cpu_mask |= mark_guest_event(p, _EVENT_PHYSIRQ); guest_event_notify(cpu_mask); } /* this is called instead of the PICs original end handler. - * the real end handler is only called once the guest ack'ed the handling + * the real end handler is only called once the guest signalled the handling * of the event. */ static void end_virt_irq (unsigned int i) { @@ -610,8 +640,6 @@ static long pci_request_irq(int irq) return -EINVAL; } - printk("pdev= %p\n", pdev); - if ( irq >= MAX_IRQS ) { printk("requested IRQ to big %d\n", irq); @@ -651,8 +679,9 @@ static long pci_request_irq(int irq) printk ("setup handler %d\n", irq); - /* request the IRQ. this is not shared! */ - err = request_irq(irq, phys_dev_interrupt, 0, "network", (void *)pdev); + /* request the IRQ. this is not shared and we use a slow handler! */ + err = request_irq(irq, phys_dev_interrupt, SA_INTERRUPT, + "foo", (void *)pdev); if ( err ) { printk("error requesting irq\n"); @@ -670,7 +699,35 @@ static long pci_request_irq(int irq) static long pci_free_irq(int irq) { - /* XXX restore original handler and free_irq() */ + phys_dev_t *pdev; + + if ( irq >= MAX_IRQS ) + { + printk("requested IRQ to big %d\n", irq); + return -EINVAL; + } + + if ( irqs[irq] == NULL ) + { + printk ("irq not used %d\n", irq); + return -EINVAL; + } + + pdev = irqs[irq]; + + /* shutdown IRQ */ + free_irq(irq, (void *)pdev); + + /* restore irq controller */ + irq_desc[irq].handler = pdev->orig_handler; + + /* clean up */ + pdev->orig_handler = NULL; + irqs[irq] = NULL; + kfree(pdev->new_handler); + pdev->new_handler = NULL; + + printk("freed irq %d", irq); return 0; } @@ -724,6 +781,7 @@ static long pci_finished_irq(int irq) return 0; } + /* * demux hypervisor call. */ diff --git a/xen/include/hypervisor-ifs/network.h b/xen/include/hypervisor-ifs/network.h index def5a6ee90..7b00a5369a 100644 --- a/xen/include/hypervisor-ifs/network.h +++ b/xen/include/hypervisor-ifs/network.h @@ -73,16 +73,16 @@ typedef union rx_entry_st } rx_entry_t; -#define TX_RING_SIZE 256 -#define RX_RING_SIZE 256 +#define XENNET_TX_RING_SIZE 256 +#define XENNET_RX_RING_SIZE 256 #define MAX_DOMAIN_VIFS 8 /* This structure must fit in a memory page. */ typedef struct net_ring_st { - tx_entry_t tx_ring[TX_RING_SIZE]; - rx_entry_t rx_ring[RX_RING_SIZE]; + tx_entry_t tx_ring[XENNET_TX_RING_SIZE]; + rx_entry_t rx_ring[XENNET_RX_RING_SIZE]; } net_ring_t; /* @@ -96,8 +96,8 @@ typedef unsigned int NET_RING_IDX; * size of the ring buffer. The following macros convert a free-running counter * into a value that can directly index a ring-buffer array. */ -#define MASK_NET_RX_IDX(_i) ((_i)&(RX_RING_SIZE-1)) -#define MASK_NET_TX_IDX(_i) ((_i)&(TX_RING_SIZE-1)) +#define MASK_NET_RX_IDX(_i) ((_i)&(XENNET_RX_RING_SIZE-1)) +#define MASK_NET_TX_IDX(_i) ((_i)&(XENNET_TX_RING_SIZE-1)) typedef struct net_idx_st { diff --git a/xen/include/xeno/vif.h b/xen/include/xeno/vif.h index 0da8b2bd99..23626b7342 100644 --- a/xen/include/xeno/vif.h +++ b/xen/include/xeno/vif.h @@ -24,11 +24,12 @@ extern struct net_device *the_dev; -/* - * shadow ring structures are used to protect the descriptors from - * tampering after they have been passed to the hypervisor. - * - * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h. +/* + * shadow ring structures are used to protect the descriptors from tampering + * after they have been passed to the hypervisor. + * + * XENNET_TX_RING_SIZE and XENNET_RX_RING_SIZE are defined in the shared + * network.h. */ typedef struct rx_shadow_entry_st @@ -53,10 +54,10 @@ typedef struct net_vif_st { net_idx_t *shared_idxs; /* The private rings and indexes. */ - rx_shadow_entry_t rx_shadow_ring[RX_RING_SIZE]; + rx_shadow_entry_t rx_shadow_ring[XENNET_RX_RING_SIZE]; NET_RING_IDX rx_prod; /* More buffers for filling go here. */ NET_RING_IDX rx_cons; /* Next buffer to fill is here. */ - tx_shadow_entry_t tx_shadow_ring[TX_RING_SIZE]; + tx_shadow_entry_t tx_shadow_ring[XENNET_TX_RING_SIZE]; NET_RING_IDX tx_prod; /* More packets for sending go here. */ NET_RING_IDX tx_cons; /* Next packet to send is here. */ diff --git a/xen/net/dev.c b/xen/net/dev.c index 1bd9120672..e4d791a585 100644 --- a/xen/net/dev.c +++ b/xen/net/dev.c @@ -1869,7 +1869,7 @@ static int get_tx_bufs(net_vif_t *vif) again: for ( i = vif->tx_req_cons; (i != shared_idxs->tx_req_prod) && - ((i-vif->tx_resp_prod) != TX_RING_SIZE); + ((i-vif->tx_resp_prod) != XENNET_TX_RING_SIZE); i++ ) { tx = shared_rings->tx_ring[MASK_NET_TX_IDX(i)].req; @@ -2061,7 +2061,7 @@ static void get_rx_bufs(net_vif_t *vif) j = vif->rx_prod; for ( i = vif->rx_req_cons; (i != shared_idxs->rx_req_prod) && - ((i-vif->rx_resp_prod) != RX_RING_SIZE); + ((i-vif->rx_resp_prod) != XENNET_RX_RING_SIZE); i++ ) { rx = shared_rings->rx_ring[MASK_NET_RX_IDX(i)].req; @@ -2193,7 +2193,7 @@ long flush_bufs_for_vif(net_vif_t *vif) spin_lock(&vif->rx_lock); for ( i = vif->rx_req_cons; (i != shared_idxs->rx_req_prod) && - ((i-vif->rx_resp_prod) != RX_RING_SIZE); + ((i-vif->rx_resp_prod) != XENNET_RX_RING_SIZE); i++ ) { make_rx_response(vif, shared_rings->rx_ring[MASK_NET_RX_IDX(i)].req.id, @@ -2242,7 +2242,7 @@ long flush_bufs_for_vif(net_vif_t *vif) spin_lock(&vif->tx_lock); for ( i = vif->tx_req_cons; (i != shared_idxs->tx_req_prod) && - ((i-vif->tx_resp_prod) != TX_RING_SIZE); + ((i-vif->tx_resp_prod) != XENNET_TX_RING_SIZE); i++ ) { make_tx_response(vif, shared_rings->tx_ring[MASK_NET_TX_IDX(i)].req.id, diff --git a/xenolinux-2.4.25-sparse/arch/xeno/config.in b/xenolinux-2.4.25-sparse/arch/xeno/config.in index 39e821556a..209bbe6d51 100644 --- a/xenolinux-2.4.25-sparse/arch/xeno/config.in +++ b/xenolinux-2.4.25-sparse/arch/xeno/config.in @@ -107,6 +107,10 @@ bool 'Networking support' CONFIG_NET bool 'PCI support' CONFIG_PCI if [ "$CONFIG_PCI" = "y" ]; then tristate ' 3c590/3c900 series (592/595/597) "Vortex/Boomerang" support' CONFIG_VORTEX + tristate 'Intel(R) PRO/1000 Gigabit Ethernet support' CONFIG_E1000 + if [ "$CONFIG_E1000" != "n" ]; then + bool ' Use Rx Polling (NAPI)' CONFIG_E1000_NAPI + fi fi source drivers/pci/Config.in diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c index 0a1bce2bfa..c5d25442e2 100644 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c @@ -58,8 +58,8 @@ struct net_private * {tx,rx}_skbs store outstanding skbuffs. The first entry in each * array is an index into a chain of free entries. */ - struct sk_buff *tx_skbs[TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[RX_RING_SIZE+1]; + struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; }; /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ @@ -143,9 +143,9 @@ static int network_open(struct net_device *dev) memset(np->net_idx, 0, sizeof(*np->net_idx)); /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for ( i = 0; i <= TX_RING_SIZE; i++ ) + for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) np->tx_skbs[i] = (void *)(i+1); - for ( i = 0; i <= RX_RING_SIZE; i++ ) + for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) np->rx_skbs[i] = (void *)(i+1); wmb(); @@ -196,7 +196,8 @@ static void network_tx_buf_gc(struct net_device *dev) } while ( prod != np->net_idx->tx_resp_prod ); - if ( np->tx_full && ((np->net_idx->tx_req_prod - prod) < TX_RING_SIZE) ) + if ( np->tx_full && + ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) { np->tx_full = 0; if ( np->state == STATE_ACTIVE ) @@ -223,7 +224,7 @@ static void network_alloc_rx_buffers(struct net_device *dev) netop_t netop; NET_RING_IDX i = np->net_idx->rx_req_prod; - if ( unlikely((i - np->rx_resp_cons) == RX_RING_SIZE) || + if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || unlikely(np->state != STATE_ACTIVE) ) return; @@ -246,7 +247,7 @@ static void network_alloc_rx_buffers(struct net_device *dev) np->rx_bufs_to_notify++; } - while ( (++i - np->rx_resp_cons) != RX_RING_SIZE ); + while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); /* * We may have allocated buffers which have entries outstanding in the page @@ -258,7 +259,7 @@ static void network_alloc_rx_buffers(struct net_device *dev) np->net_idx->rx_event = np->rx_resp_cons + 1; /* Batch Xen notifications. */ - if ( np->rx_bufs_to_notify > (RX_RING_SIZE/4) ) + if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) { netop.cmd = NETOP_PUSH_BUFFERS; netop.vif = np->idx; @@ -313,7 +314,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) network_tx_buf_gc(dev); - if ( (i - np->tx_resp_cons) == (TX_RING_SIZE - 1) ) + if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) { np->tx_full = 1; netif_stop_queue(dev); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c index 465dd18233..91f3c5c17e 100644 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c @@ -58,8 +58,8 @@ struct net_private * {tx,rx}_skbs store outstanding skbuffs. The first entry in each * array is an index into a chain of free entries. */ - struct sk_buff *tx_skbs[TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[RX_RING_SIZE+1]; + struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; }; /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ @@ -143,9 +143,9 @@ static int network_open(struct net_device *dev) memset(np->net_idx, 0, sizeof(*np->net_idx)); /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for ( i = 0; i <= TX_RING_SIZE; i++ ) + for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) np->tx_skbs[i] = (void *)(i+1); - for ( i = 0; i <= RX_RING_SIZE; i++ ) + for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) np->rx_skbs[i] = (void *)(i+1); wmb(); @@ -196,7 +196,8 @@ static void network_tx_buf_gc(struct net_device *dev) } while ( prod != np->net_idx->tx_resp_prod ); - if ( np->tx_full && ((np->net_idx->tx_req_prod - prod) < TX_RING_SIZE) ) + if ( np->tx_full && + ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) { np->tx_full = 0; if ( np->state == STATE_ACTIVE ) @@ -223,7 +224,7 @@ static void network_alloc_rx_buffers(struct net_device *dev) netop_t netop; NET_RING_IDX i = np->net_idx->rx_req_prod; - if ( unlikely((i - np->rx_resp_cons) == RX_RING_SIZE) || + if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || unlikely(np->state != STATE_ACTIVE) ) return; @@ -246,7 +247,7 @@ static void network_alloc_rx_buffers(struct net_device *dev) np->rx_bufs_to_notify++; } - while ( (++i - np->rx_resp_cons) != RX_RING_SIZE ); + while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); /* * We may have allocated buffers which have entries outstanding in the page @@ -258,7 +259,7 @@ static void network_alloc_rx_buffers(struct net_device *dev) np->net_idx->rx_event = np->rx_resp_cons + 1; /* Batch Xen notifications. */ - if ( np->rx_bufs_to_notify > (RX_RING_SIZE/4) ) + if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) { netop.cmd = NETOP_PUSH_BUFFERS; netop.vif = np->idx; @@ -313,7 +314,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) network_tx_buf_gc(dev); - if ( (i - np->tx_resp_cons) == (TX_RING_SIZE - 1) ) + if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) { np->tx_full = 1; netif_stop_queue(dev); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/hypervisor.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/hypervisor.c index 64a3590362..7c6aca05c5 100644 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/hypervisor.c +++ b/xenolinux-2.4.25-sparse/arch/xeno/kernel/hypervisor.c @@ -7,8 +7,9 @@ */ #include -#include #include +#include +#include #include #include #include @@ -18,6 +19,40 @@ int nr_multicall_ents = 0; static unsigned long event_mask = 0; +asmlinkage unsigned int do_physirq(int irq, struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long irqs; + shared_info_t *shared = HYPERVISOR_shared_info; + + /* do this manually */ + kstat.irqs[cpu][irq]++; + ack_hypervisor_event(irq); + + barrier(); + irqs = xchg(&shared->physirq_pend, 0); + + __asm__ __volatile__ ( + " push %1 ;" + " sub $4,%%esp ;" + " jmp 3f ;" + "1: btrl %%eax,%0 ;" /* clear bit */ + " mov %%eax,(%%esp) ;" + " call do_IRQ ;" /* do_IRQ(event) */ + "3: bsfl %0,%%eax ;" /* %eax == bit # */ + " jnz 1b ;" + " add $8,%%esp ;" + /* we use %ebx because it is callee-saved */ + : : "b" (irqs), "r" (regs) + /* clobbered by callback function calls */ + : "eax", "ecx", "edx", "memory" ); + + /* do this manually */ + end_hypervisor_event(irq); + + return 0; +} + void do_hypervisor_callback(struct pt_regs *regs) { unsigned long events, flags; @@ -32,6 +67,12 @@ void do_hypervisor_callback(struct pt_regs *regs) events = xchg(&shared->events, 0); events &= event_mask; + if ( (events & EVENT_PHYSIRQ) != 0 ) + { + do_physirq(_EVENT_PHYSIRQ, regs); + events &= ~EVENT_PHYSIRQ; + } + __asm__ __volatile__ ( " push %1 ;" " sub $4,%%esp ;" diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/i386_ksyms.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/i386_ksyms.c index e62ca85c04..6744999039 100644 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/i386_ksyms.c +++ b/xenolinux-2.4.25-sparse/arch/xeno/kernel/i386_ksyms.c @@ -9,7 +9,7 @@ #include #include #include -//XXX ??? #include +#include #include #include #include @@ -68,6 +68,8 @@ EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(apm_info); //EXPORT_SYMBOL(gdt); EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(phys_to_machine_mapping); + #ifdef CONFIG_DEBUG_IOVIRT EXPORT_SYMBOL(__io_virt_debug); @@ -101,6 +103,16 @@ EXPORT_SYMBOL(__generic_copy_from_user); EXPORT_SYMBOL(__generic_copy_to_user); EXPORT_SYMBOL(strnlen_user); + +EXPORT_SYMBOL(pci_alloc_consistent); +EXPORT_SYMBOL(pci_free_consistent); + +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pcibios_penalize_isa_irq); +EXPORT_SYMBOL(pci_mem_start); +#endif + + #ifdef CONFIG_X86_USE_3DNOW EXPORT_SYMBOL(_mmx_memcpy); EXPORT_SYMBOL(mmx_clear_page); diff --git a/xenolinux-2.4.25-sparse/arch/xeno/kernel/physirq.c b/xenolinux-2.4.25-sparse/arch/xeno/kernel/physirq.c index 3a6083b24e..1f7a8e4fee 100644 --- a/xenolinux-2.4.25-sparse/arch/xeno/kernel/physirq.c +++ b/xenolinux-2.4.25-sparse/arch/xeno/kernel/physirq.c @@ -38,7 +38,9 @@ static unsigned int startup_physirq_event(unsigned int irq) printk("startup_physirq_event %d\n", irq); /* - * install a interrupt handler for physirq event when called thefirst tim + * install a interrupt handler for physirq event when called first time + * we actually are never executing the handler as _EVENT_PHYSIRQ is + * handled specially in hypervisor.c But we need to enable the event etc. */ if ( !setup_event_handler ) { @@ -66,23 +68,51 @@ static unsigned int startup_physirq_event(unsigned int irq) } return 0; } +/* + * This is a dummy interrupt handler. + * It should never be called. events for physical interrupts are handled + * differently in hypervisor.c + */ +static void physirq_interrupt(int irq, void *unused, struct pt_regs *ptregs) +{ + printk("XXX This should never be called!"); +} + +/* + * IRQ is not needed anymore. + */ static void shutdown_physirq_event(unsigned int irq) { + physdev_op_t op; + int err; - /* call xen to free IRQ */ + printk("shutdown_phys_irq called."); + /* + * tell hypervisor + */ + op.cmd = PHYSDEVOP_FREE_IRQ; + op.u.free_irq.irq = irq; + if ( (err = HYPERVISOR_physdev_op(&op)) != 0 ) + { + printk(KERN_ALERT "could not free IRQ %d\n", irq); + return; + } + return; } static void enable_physirq_event(unsigned int irq) { - /* XXX just enable all interrupts for now */ + /* XXX just enable all phys interrupts for now */ + enable_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ)); } static void disable_physirq_event(unsigned int irq) { - /* XXX just disable all interrupts for now */ + /* XXX just disable all phys interrupts for now */ + disable_irq(HYPEREVENT_IRQ(_EVENT_PHYSIRQ)); } static void ack_physirq_event(unsigned int irq) @@ -100,6 +130,7 @@ static void end_physirq_event(unsigned int irq) { int err; physdev_op_t op; + /* call hypervisor */ op.cmd = PHYSDEVOP_FINISHED_IRQ; op.u.finished_irq.irq = irq; @@ -123,21 +154,6 @@ static struct hw_interrupt_type physirq_irq_type = { }; -/* - * this interrupt handler demuxes the virt phys event and the virt phys - * bitmask and calls the interrupt handlers for virtualised physical interrupts - */ -static void physirq_interrupt(int irq, void *unused, struct pt_regs *ptregs) -{ -#if 0 - unsigned long flags; - int virq; - local_irq_save(flags); - do_IRQ(virq); - local_irq_restore(flags); -#endif -} - void __init physirq_init(void) { diff --git a/xenolinux-2.4.25-sparse/include/asm-xeno/io.h b/xenolinux-2.4.25-sparse/include/asm-xeno/io.h new file mode 100644 index 0000000000..3d78e20950 --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xeno/io.h @@ -0,0 +1,430 @@ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#include + +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo + */ + +#define IO_SPACE_LIMIT 0xffff + +#define XQUAD_PORTIO_BASE 0xfe400000 +#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ +#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */ + +#ifdef __KERNEL__ + +#include + +/* + * Temporary debugging check to catch old code using + * unmapped ISA addresses. Will be removed in 2.4. + */ +#if CONFIG_DEBUG_IOVIRT + extern void *__io_virt_debug(unsigned long x, const char *file, int line); + extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); + #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) +//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) +#else + #define __io_virt(x) ((void *)(x)) +//#define __io_phys(x) __pa(x) +#endif + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline unsigned long virt_to_phys(volatile void * address) +{ + return __pa(address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void * phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +#ifdef CONFIG_HIGHMEM64G +#define page_to_phys(page) ((u64)(page - mem_map) << PAGE_SHIFT) +#else +#define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT) +#endif + +extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ + +static inline void * ioremap (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, 0); +} + +/** + * ioremap_nocache - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_nocache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked uncachable + * on the CPU as well as honouring existing caching rules from things like + * the PCI bus. Note that there are other caches and buffers on many + * busses. In paticular driver authors should read up on PCI writes + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + */ + +static inline void * ioremap_nocache (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, _PAGE_PCD); +} + +extern void iounmap(void *addr); + +/* + * bt_ioremap() and bt_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + * A boot-time mapping is currently limited to at most 16 pages. + */ +extern void *bt_ioremap(unsigned long offset, unsigned long size); +extern void bt_iounmap(void *addr, unsigned long size); + +/* + * IO bus memory addresses are also 1:1 with the physical address + */ +#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x)) +#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) +#define page_to_bus(_x) phys_to_machine(page_to_phys(_x)) + +/* + * readX/writeX() are used to access memory mapped devices. On some + * architectures the memory mapped IO stuff needs to be accessed + * differently. On the x86 architecture, we just read/write the + * memory location directly. + */ + +#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) +#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) +#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl + +#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) +#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) +#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + +#define memset_io(a,b,c) __memset(__io_virt(a),(b),(c)) +#define memcpy_fromio(a,b,c) __memcpy((a),__io_virt(b),(c)) +#define memcpy_toio(a,b,c) __memcpy(__io_virt(a),(b),(c)) + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char *)(PAGE_OFFSET)) + +#define isa_readb(a) readb(__ISA_IO_base + (a)) +#define isa_readw(a) readw(__ISA_IO_base + (a)) +#define isa_readl(a) readl(__ISA_IO_base + (a)) +#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) +#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) +#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) +#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) +#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) +#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) + + +/* + * Again, i386 does not require mem IO specific function. + */ + +#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) +#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) + +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ + +static inline int check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +/** + * isa_check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the ISA mmio address io_addr. + * Returns 1 on a match. + * + * This function is deprecated. New drivers should use ioremap and + * check_signature. + */ + + +static inline int isa_check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (isa_readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +/* + * Cache management + * + * This needed for two cases + * 1. Out of order aware processors + * 2. Accidentally out of order processors (PPro errata #51) + */ + +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + +static inline void flush_write_buffers(void) +{ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); +} + +#define dma_cache_inv(_start,_size) flush_write_buffers() +#define dma_cache_wback(_start,_size) flush_write_buffers() +#define dma_cache_wback_inv(_start,_size) flush_write_buffers() + +#else + +/* Nothing to do */ + +#define dma_cache_inv(_start,_size) do { } while (0) +#define dma_cache_wback(_start,_size) do { } while (0) +#define dma_cache_wback_inv(_start,_size) do { } while (0) +#define flush_write_buffers() + +#endif + +#endif /* __KERNEL__ */ + +#ifdef SLOW_IO_BY_JUMPING +#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" +#else +#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +#endif + +#ifdef REALLY_SLOW_IO +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +#else +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO +#endif + +#ifdef CONFIG_MULTIQUAD +extern void *xquad_portio; /* Where the IO area was mapped */ +#endif /* CONFIG_MULTIQUAD */ + +/* + * Talk about misusing macros.. + */ +#define __OUT1(s,x) \ +static inline void out##s(unsigned x value, unsigned short port) { + +#define __OUT2(s,s1,s2) \ +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" + +#if defined (CONFIG_MULTIQUAD) && !defined(STANDALONE) +#define __OUTQ(s,ss,x) /* Do the equivalent of the portio op on quads */ \ +static inline void out##ss(unsigned x value, unsigned short port) { \ + if (xquad_portio) \ + write##s(value, (unsigned long) xquad_portio + port); \ + else /* We're still in early boot, running on quad 0 */ \ + out##ss##_local(value, port); \ +} \ +static inline void out##ss##_quad(unsigned x value, unsigned short port, int quad) { \ + if (xquad_portio) \ + write##s(value, (unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ + + port); \ +} + +#define __INQ(s,ss) /* Do the equivalent of the portio op on quads */ \ +static inline RETURN_TYPE in##ss(unsigned short port) { \ + if (xquad_portio) \ + return read##s((unsigned long) xquad_portio + port); \ + else /* We're still in early boot, running on quad 0 */ \ + return in##ss##_local(port); \ +} \ +static inline RETURN_TYPE in##ss##_quad(unsigned short port, int quad) { \ + if (xquad_portio) \ + return read##s((unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ + + port); \ + else\ + return 0;\ +} +#endif /* CONFIG_MULTIQUAD && !STANDALONE */ + +#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) +#define __OUT(s,s1,x) \ +__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} +#else +/* Make the default portio routines operate on quad 0 */ +#define __OUT(s,s1,x) \ +__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUTQ(s,s,x) \ +__OUTQ(s,s##_p,x) +#endif /* !CONFIG_MULTIQUAD || STANDALONE */ + +#define __IN1(s) \ +static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; + +#define __IN2(s,s1,s2) \ +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" + +#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) +#define __IN(s,s1,i...) \ +__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } +#else +/* Make the default portio routines operate on quad 0 */ +#define __IN(s,s1,i...) \ +__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__INQ(s,s) \ +__INQ(s,s##_p) +#endif /* !CONFIG_MULTIQUAD || STANDALONE */ + +#define __INS(s) \ +static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; ins" #s \ +: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define __OUTS(s) \ +static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; outs" #s \ +: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define RETURN_TYPE unsigned char +__IN(b,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned short +__IN(w,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned int +__IN(l,"") +#undef RETURN_TYPE + +__OUT(b,"b",char) +__OUT(w,"w",short) +__OUT(l,,int) + +__INS(b) +__INS(w) +__INS(l) + +__OUTS(b) +__OUTS(w) +__OUTS(l) + +#endif diff --git a/xenolinux-2.4.25-sparse/mkbuildtree b/xenolinux-2.4.25-sparse/mkbuildtree index 1a5182a50b..64818b45da 100755 --- a/xenolinux-2.4.25-sparse/mkbuildtree +++ b/xenolinux-2.4.25-sparse/mkbuildtree @@ -137,8 +137,7 @@ ln -sf ../asm-i386/hardirq.h ln -sf ../asm-i386/hdreg.h ln -sf ../asm-i386/i387.h ln -sf ../asm-i386/ide.h -ln -sf ../asm-i386/init.h -ln -sf ../asm-i386/io.h +ln -sf ../asm-i386/init.h ln -sf ../asm-i386/io_apic.h ln -sf ../asm-i386/ioctl.h ln -sf ../asm-i386/ioctls.h -- 2.30.2